In [1]:
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
from plotly import graph_objs as go
import plotly.express as pl
import plotly.offline as pyo
from plotly.subplots import make_subplots
import seaborn as sns


from scipy import stats


import tensorflow as tf
import keras
import keras.utils

from keras.models import Model
from keras.layers import Dense, Input, Dropout, Dense, LSTM, RepeatVector,TimeDistributed
from keras import regularizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

Task 1: # Data Loading

Input file "machine_0.csv" selected to find the anomaly in machine state

In [2]:
data = pd.read_csv('data/machine_0.csv')
data.columns = ['Time', 'f1', 'f2','f3','f4']
data['Time'] = pd.to_datetime(data['Time'])

data['Time'] = data['Time'].dt.floor('s')
data.head()
Out[2]:
Time f1 f2 f3 f4
0 2019-01-01 00:00:00 12.626096 8.803120 -11.809200 10.083961
1 2019-01-01 08:00:09 10.831994 2.816327 11.554778 21.892853
2 2019-01-01 16:00:19 21.083510 -0.672645 -17.839178 -1.349024
3 2019-01-02 00:00:28 32.294495 6.525132 -13.498586 -4.250752
4 2019-01-02 08:00:38 28.057100 3.691359 21.984744 13.670561
In [3]:
fig = make_subplots(rows = 2, cols=2)
fig.add_trace(go.Scatter(x=data.index, y=data["f1"],name='Feature 1'), row=1, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f2"],name='Feature 2'), row=1, col=2)
fig.add_trace(go.Scatter(x=data.index, y=data["f3"],name='Feature 3'), row=2, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f4"],name='Feature 4'), row=2, col=2)

fig.update_layout(title_text='Features')
fig.update_layout(annotations=[ dict(
            x=0.5,
            y=-0.15,
            showarrow=False,
            text="Data Index",
            xref="paper",
            yref="paper"
        ),
        dict(
            x=-0.07,
            y=0.5,
            showarrow=False,
            text="Features Value Range",
            textangle=-90,
            xref="paper",
            yref="paper"
        )
    ])
pyo.iplot(fig)

Task 2 : Removing Noise from the data

Moving average

   This function accepts series and window size and returns moving average of the series with same indices
In [4]:
def ma(series, win_size):
    
    return pd.Series(data=np.convolve(a=series, v=np.ones(int(win_size))/float(win_size), 
                                      mode='same'), index=series.index)
In [5]:
N = 3 # Standard deviation

def threshold(series, lam):
    
    upper = pd.Series(data=(series.mean()+(N*series.std()*(lam/(2-lam))**0.5)),index=series.index)
    lower = pd.Series(data=(series.mean()-(N*series.std()*(lam/(2-lam))**0.5)),index=series.index)
    
    return upper , lower
In [6]:
data_ma= pd.DataFrame()
data_ma['Time'] = data['Time']
data_ma['f1'] = ma(data['f1'],20)
data_ma['f2'] = ma(data['f2'],20)
data_ma['f3'] = ma(data['f3'],20)
data_ma['f4'] = ma(data['f4'],20)
data_ma.head()
Out[6]:
Time f1 f2 f3 f4
0 2019-01-01 00:00:00 13.061998 0.698791 1.274844 -0.407471
1 2019-01-01 08:00:09 26.762524 0.789543 2.266488 -13.447164
2 2019-01-01 16:00:19 27.232547 -11.988773 3.962774 -13.531214
3 2019-01-02 00:00:28 27.635057 -12.354438 3.919560 -14.703468
4 2019-01-02 08:00:38 27.890448 0.155663 4.233751 -15.596987
In [7]:
fig = make_subplots(rows = 4, cols=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f1"],name='Feature 1 ',line = dict( width=1, dash='dot')), row=1, col=1)
fig.add_trace(go.Scatter(x=data_ma.index, y=data_ma["f1"],name='Denoise Feature 1',line = dict(width=2 )), row=1, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f2"],name='Feature 2',line = dict( width=1, dash='dot')), row=2, col=1)
fig.add_trace(go.Scatter(x=data_ma.index, y=data_ma["f2"],name='Denoise Feature 2',line = dict(width=2 )), row=2, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f3"],name='Feature 3',line = dict( width=1, dash='dot')), row=3, col=1)
fig.add_trace(go.Scatter(x=data_ma.index, y=data_ma["f3"],name='Denoise Feature 3',line = dict(width=2 )), row=3, col=1)
fig.add_trace(go.Scatter(x=data.index, y=data["f4"],name='Feature 4',line = dict( width=1, dash='dot')), row=4, col=1)
fig.add_trace(go.Scatter(x=data_ma.index, y=data_ma["f4"],name='Denoise Feature 4',line = dict(width=2 )), row=4, col=1)
fig.update_layout(title_text='Machine Sensor Data')
fig.update_layout(annotations=[ dict(
            x=0.5,
            y=-0.15,
            showarrow=False,
            text="Data Index",
            xref="paper",
            yref="paper"
        ),
        dict(
            x=-0.07,
            y=0.5,
            showarrow=False,
            text="Features Value Range",
            textangle=-90,
            xref="paper",
            yref="paper"
        )
    ])
pyo.iplot(fig)
In [8]:
data_ma_diff = data_ma.diff(20)
data_ma_diff_thresh_f1= threshold(data_ma_diff['f1'], 0.9)
data_ma_diff_thresh_f2= threshold(data_ma_diff['f2'], 0.9)
data_ma_diff_thresh_f3= threshold(data_ma_diff['f3'], 0.9)
data_ma_diff_thresh_f4= threshold(data_ma_diff['f4'], 0.9)

#individual feature figure

fig_ind = go.Figure()
fig_ind.add_trace(go.Scatter(x=data.index, y=data['f1'], name='Feature 1',
                         line = dict( width=1, dash='dot', color='black')))
fig_ind.add_trace(go.Scatter(x=data.index, y=data_ma['f1'], name='Denoise Feature 1 ',
                         line = dict( width=1,color='blue')))
fig_ind.add_trace(go.Scatter(x=data.index, y=data_ma_diff_thresh_f1[0], name='upper limit',
                         line = dict( width=2, color='red')))
fig_ind.add_trace(go.Scatter(x=data.index, y=data_ma_diff_thresh_f1[1], name='lower limit',
                         line = dict( width=2, color='red')))
fig_ind.update_layout(title_text='Machine Data Feature 1',
                     xaxis_title="Data Index",
                 yaxis_title="Features Value Range"
                 )
pyo.iplot(fig_ind)
In [9]:
data_ma.head()
Out[9]:
Time f1 f2 f3 f4
0 2019-01-01 00:00:00 13.061998 0.698791 1.274844 -0.407471
1 2019-01-01 08:00:09 26.762524 0.789543 2.266488 -13.447164
2 2019-01-01 16:00:19 27.232547 -11.988773 3.962774 -13.531214
3 2019-01-02 00:00:28 27.635057 -12.354438 3.919560 -14.703468
4 2019-01-02 08:00:38 27.890448 0.155663 4.233751 -15.596987
In [10]:
## LSTM Autoencoder Neural Networks

### Task 3 : Creating training and testing dataset
In [11]:
k = data_ma

train_size = int(len(k) * 0.7)
test_size = len(k) - train_size
train , test = k.iloc[0:train_size], k.iloc[train_size:len(k)]
scale = MinMaxScaler()

X_train = np.array(train[['f1','f2','f3','f4']])
X_test = np.array(test[['f1','f2','f3','f4']])

#calculating zscore and training value range is high
X_train = stats.zscore(X_train)
X_test = stats.zscore(X_test)


X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
In [12]:
### Task 4 : Build an LSTM Autoencoder
In [13]:
def LSTM_NN(X):
    
    input_shape = Input(shape=(X.shape[1], X.shape[2]))
    LL1 = LSTM(128, activation='relu', return_sequences=True, 
               kernel_regularizer=regularizers.l2(0.00))(input_shape)
    LL2 = LSTM(64, activation='relu', return_sequences=False)(LL1)
    LL3 = RepeatVector(X.shape[1])(LL2)
    LL4 = LSTM(64, activation='relu', return_sequences=True)(LL3)
    LL5 = LSTM(128, activation='relu', return_sequences=True)(LL4)
    out = TimeDistributed(Dense(X.shape[2]))(LL5)
    model = Model(inputs=input_shape, outputs = out)
    
    return model
In [14]:
model = LSTM_NN(X_train)
model.compile(optimizer='adam', loss='mae')
model.summary()
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 1, 4)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 1, 128)            68096     
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                49408     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 1, 64)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 1, 64)             33024     
_________________________________________________________________
lstm_3 (LSTM)                (None, 1, 128)            98816     
_________________________________________________________________
time_distributed (TimeDistri (None, 1, 4)              516       
=================================================================
Total params: 249,860
Trainable params: 249,860
Non-trainable params: 0
_________________________________________________________________
In [15]:
### Task 5 : Train the Autoencoder
In [16]:
n_epoch = 100
batch = 20
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, mode='min')
history = model.fit(X_train, X_train, 
                    epochs=n_epoch, 
                    batch_size = batch,
                    validation_split = 0.05,
                    callbacks = [es],
                    shuffle=False).history
Epoch 1/100
100/100 [==============================] - 2s 18ms/step - loss: 0.7007 - val_loss: 0.6881
Epoch 2/100
100/100 [==============================] - 1s 6ms/step - loss: 0.6810 - val_loss: 0.6060
Epoch 3/100
100/100 [==============================] - 1s 6ms/step - loss: 0.6357 - val_loss: 0.6199
Epoch 4/100
100/100 [==============================] - 1s 6ms/step - loss: 0.5582 - val_loss: 0.4777
Epoch 5/100
100/100 [==============================] - 1s 6ms/step - loss: 0.4896 - val_loss: 0.4384
Epoch 6/100
100/100 [==============================] - 1s 6ms/step - loss: 0.4060 - val_loss: 0.3418
Epoch 7/100
100/100 [==============================] - 1s 6ms/step - loss: 0.3617 - val_loss: 0.2787
Epoch 8/100
100/100 [==============================] - 1s 6ms/step - loss: 0.3181 - val_loss: 0.2450
Epoch 9/100
100/100 [==============================] - 1s 6ms/step - loss: 0.2692 - val_loss: 0.1653
Epoch 10/100
100/100 [==============================] - 1s 6ms/step - loss: 0.1975 - val_loss: 0.1188
Epoch 11/100
100/100 [==============================] - 1s 5ms/step - loss: 0.1580 - val_loss: 0.1067
Epoch 12/100
100/100 [==============================] - 1s 6ms/step - loss: 0.1111 - val_loss: 0.0790
Epoch 13/100
100/100 [==============================] - 1s 6ms/step - loss: 0.1057 - val_loss: 0.1145
Epoch 14/100
100/100 [==============================] - 1s 6ms/step - loss: 0.0909 - val_loss: 0.0883
Epoch 15/100
100/100 [==============================] - 1s 6ms/step - loss: 0.0830 - val_loss: 0.0789
Epoch 16/100
100/100 [==============================] - 1s 6ms/step - loss: 0.0813 - val_loss: 0.0807
Epoch 17/100
100/100 [==============================] - 1s 5ms/step - loss: 0.0770 - val_loss: 0.0831
Epoch 18/100
100/100 [==============================] - 1s 5ms/step - loss: 0.0870 - val_loss: 0.0860
In [17]:
### Plot metrics 
In [18]:
plt.plot(history['loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.legend();

Evaluate the Model

In [19]:
x_pred = model.predict(X_train)
x_pred = x_pred.reshape(x_pred.shape[0], x_pred.shape[2])
x_pred = pd.DataFrame(x_pred, columns=['f1', 'f2', 'f3', 'f4'])
x_pred.index = train.index
In [20]:
score = pd.DataFrame(index=train.index)
Xtrain = X_train.reshape(X_train.shape[0], X_train.shape[2])
score['Loss'] = np.mean(np.abs(x_pred - Xtrain), axis = 1)
In [21]:
sns.set(style='whitegrid', palette='muted')
rcParams['figure.figsize'] = 14, 8
#np.random.seed(1)
#tf.random.set_seed(1)

sns.distplot(score['Loss'], bins=50, kde=True);

Task 6: Setting the Threshold Value

Before a machine fails, it ramps into the faulty mode. Indicating faulty mode with 2 signs

1. Faulty - warning

This indicates warning about machine may be entering in the faulty mode

2. Faulty - Alert

This indicates that machine is in the faulty mode, necessary actions need to be taken to prevent it from entering in Failed status

From the "Loss Distribution" we can determine the suitable threshold value for identifying the anomalies
In [22]:
#set early warning and critical alert

warn = score['Loss'].max()
warn_thresh = (warn * 0.4).round(3)
alert_thresh = (warn_thresh * 1.4).round(3)
print('Faulty Warning : ' + str(warn_thresh))
print('Faulty Alert :   ' + str(alert_thresh))
Faulty Warning : 0.168
Faulty Alert :   0.235

Plot the results to see how the prediction compares with the data

In [23]:
fig = plt.figure(figsize=(15,6))
plt.plot(train['f1'], label='Feature 1 with Noise')
plt.plot(x_pred['f1'], label='Prediction for feature 1 w/o noise')
plt.xlabel('Data Range')
plt.ylabel('Feature Range')
plt.legend(loc='upper left')
Out[23]:
<matplotlib.legend.Legend at 0x20f0e04a550>

Task 7 : Detect the Anomalies in the Test data

In [24]:
X_test_pred = model.predict(X_test)
X_test_pred = X_test_pred.reshape(X_test_pred.shape[0], X_test_pred.shape[2])
X_test_pred = pd.DataFrame(X_test_pred, columns=['f1', 'f2', 'f3', 'f4'])
X_test_pred.index = test.index
In [25]:
score_test = pd.DataFrame(index=test.index)
Xtest = X_test.reshape(X_test.shape[0], X_test.shape[2])
score_test['Loss'] = np.mean(np.abs(X_test_pred - Xtest), axis = 1)
In [26]:
score_test['Time'] = test['Time']
score_test['f1'] = test['f1']
score_test['f2'] = test['f2']
score_test['f3'] = test['f3']
score_test['f4'] = test['f4']
score_test['warn_thresh'] = warn_thresh 
score_test['alert_thresh']= alert_thresh

score_test.loc[(score_test['Loss']>=warn_thresh)&(score_test['Loss']<alert_thresh),'Mode']='Faulty Warn'  
score_test.loc[(score_test['Loss']>=alert_thresh),'Mode']='Faulty Alert'
score_test.loc[(score_test['Loss']<warn_thresh),'Mode']='Normal'
score_test.head()
Out[26]:
Loss Time f1 f2 f3 f4 warn_thresh alert_thresh Mode
2100 0.087666 2020-12-01 05:36:06 0.000736 12.500009 12.501308 -0.000170 0.168 0.235 Normal
2101 0.061440 2020-12-01 13:36:16 0.000641 -0.000934 0.001762 -0.000429 0.168 0.235 Normal
2102 0.061439 2020-12-01 21:36:25 -0.000730 -0.000456 0.002275 0.000646 0.168 0.235 Normal
2103 0.064902 2020-12-02 05:36:35 -0.001390 12.498830 0.002295 0.000369 0.168 0.235 Normal
2104 0.064902 2020-12-02 13:36:45 -0.002638 12.499958 0.001514 -0.000394 0.168 0.235 Normal

Plot the Data and Threshold

In [27]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=score_test.Time, y=score_test.Loss, mode='lines', name='Test loss'))
fig.add_trace(go.Scatter(x=score_test.Time, y=score_test.warn_thresh, mode='lines', name='Warning_Thresh'))
fig.add_trace(go.Scatter(x=score_test.Time, y=score_test.alert_thresh, mode='lines', name='Alert_Thresh'))
fig.update_layout(showlegend=True, title_text='Test Data and corresponding Thresholds',
                 xaxis_title="Time",
                 yaxis_title="Loss Range")
pyo.iplot(fig)
This graph visualize the relationship between loss and the thresholds.These loss thresholds filter out the abnormal entries in the data
In [28]:
faulty = score_test[score_test['Mode']=='Faulty Warn']
failed =  score_test[score_test['Mode']=='Faulty Alert']
In [29]:
scaler = StandardScaler()
scaler = scaler.fit(test[['f1']])

Plot to indicate Faulty Period

The below graph shows the beginning of the faulty period. There are two indications.

1)Fault Warning -
    At the beginning of the faulty period's warnings will be given(indicated by green markers).
2)Faulty Alert -
    If the machine is not checked after fault warnings, then fault alert may get generate due to degradation in machine condition(indicated by red markers)
In [30]:
fig_fault = go.Figure()
fig_fault.add_trace(go.Scatter(x=faulty.Time, y=scaler.inverse_transform(faulty.f1), mode='markers',marker=dict(size=10, color='green'), name='Fault Warning'))
fig_fault.add_trace(go.Scatter(x=failed.Time, y=scaler.inverse_transform(failed.f1), mode='markers',marker=dict(size=10, color='red'), name='Fault Alert'))
fig_fault.add_trace(go.Scatter(x=faulty.Time, y=scaler.inverse_transform(faulty.f2), mode='markers',marker=dict(size=10, color='green'), name='Fault Warning'))
fig_fault.add_trace(go.Scatter(x=failed.Time, y=scaler.inverse_transform(failed.f2), mode='markers',marker=dict(size=10, color='red'), name='Fault Alert'))
fig_fault.add_trace(go.Scatter(x=faulty.Time, y=scaler.inverse_transform(faulty.f3), mode='markers',marker=dict(size=10, color='green'), name='Fault Warning'))
fig_fault.add_trace(go.Scatter(x=failed.Time, y=scaler.inverse_transform(failed.f3), mode='markers',marker=dict(size=10, color='red'), name='Fault Alert'))
fig_fault.add_trace(go.Scatter(x=faulty.Time, y=scaler.inverse_transform(faulty.f4), mode='markers',marker=dict(size=10, color='green'), name='Fault Warning'))
fig_fault.add_trace(go.Scatter(x=failed.Time, y=scaler.inverse_transform(failed.f4), mode='markers',marker=dict(size=10, color='red'), name='Fault Alert'))
fig_fault.update_layout(title_text='Time vs Fault warn',xaxis_title="Time",yaxis_title="Feature Range",showlegend=False)
pyo.iplot(fig_fault)
In [31]:
score_test.to_csv('Prediction_output.csv')
We are showing Data from Jan 2021. After about Jan 4, 2021, the signal begins to behave differently, the green color marker is triggered as a warning and red marker is triggered as a alert. Around June, critical end warning is triggered when time series experiences a significant fluctuation. We can see "Fault warnings" are triggered before and after the "Fault alert warnings", this is because the time series is shown fluctuation after the critical fault alert.

The approach

In this anomaly prediction approach, we have used an autoencoder LSTM architecture. The reasons for using this Model are as follow
1) to Train a model on the machine data when a machine was in a normal state and then determine the anomalies in a while reconstructing data on the test dataset.
2) Advantage of using LSTM approach is to include multivariate features in the analysis. Computational performance of the other approaches likes "Classification and Regression Trees", "ARIMA" , gets impacted with an increasing number of the features.
we have classified the anomalies in 'Low' and 'High' alerts. The Faulty warning points detect the starting point of the 'Faulty' state of the machine. If the machine is still unattended, then the Faulty alert points indicate the high alert that machine immediately needs attention to avoid the Failed state

Limitation

Performance of this Model is dependent on the training data and the denoising process. Improper denoised training data may affect the sensitivity of the anomaly prediction.

Other approaches

I would like to try anomaly detection using autoencoder methods in H2O

Generated output can be view in "Prediction_output.csv", In this csv features(f1,f2,f3,f4) are denoised.

In [ ]: